## 'data.frame': 2800 obs. of 29 variables:
## $ Age : num 41 23 46 70 70 18 59 80 66 68 ...
## $ Sex : num 0 0 1 0 0 0 0 0 0 1 ...
## $ On Thyroxine : num 0 0 0 1 0 1 0 0 0 0 ...
## $ Query on Thyroxine: num 0 0 0 0 0 0 0 0 0 0 ...
## $ Antithyroid Med : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Sick : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Pregnant : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Thyroid Surgery : num 0 0 0 0 0 0 0 0 0 0 ...
## $ I131 treatment : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Query Hypothyroid : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Query Hyperthyroid: num 0 0 0 0 0 0 0 0 0 0 ...
## $ Lithium : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Goitre : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Tumor : num 0 0 0 0 0 0 0 0 1 0 ...
## $ Hypopituitary : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Psych : num 0 0 0 0 0 0 0 0 0 0 ...
## $ TSH measured : num 1 1 1 1 1 1 0 1 1 1 ...
## $ TSH : num 1.3 4.1 0.98 0.16 0.72 0.03 NA 2.2 0.6 2.4 ...
## $ T3 measured : num 1 1 0 1 1 0 0 1 1 1 ...
## $ T3 : num 2.5 2 NA 1.9 1.2 NA NA 0.6 2.2 1.6 ...
## $ TT4 measured : num 1 1 1 1 1 1 1 1 1 1 ...
## $ TT4 : num 125 102 109 175 61 183 72 80 123 83 ...
## $ T4U measured : num 1 0 1 0 1 1 1 1 1 1 ...
## $ T4U : num 1.14 NA 0.91 NA 0.87 1.3 0.92 0.7 0.93 0.89 ...
## $ FTI measured : num 1 0 1 0 1 1 1 1 1 1 ...
## $ FTI : num 109 NA 120 NA 70 141 78 115 132 93 ...
## $ referral sourse : chr "SVHC" "other" "other" "other" ...
## $ State : chr "negative" "negative" "negative" "negative" ...
## $ ID : num 3733 1442 2965 806 2807 ...
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
Eliminamos Hypopituary porque solo tiene un valor distinto de 0
Causes <- c("Pregnant", "Goitre", "Tumor", "Psych")
for(i in Causes){
data[,i] <- fun_range(data[,i])
}
means_cp <- c(mean(data_p$Pregnant, na.rm = TRUE), mean(data_p$Goitre, na.rm = TRUE), mean(data_p$Tumor, na.rm = TRUE), mean(data_p$Psych, na.rm = TRUE))
means_cn <- c(mean(data_n$Pregnant, na.rm = TRUE), mean(data_n$Goitre, na.rm = TRUE), mean(data_n$Tumor, na.rm = TRUE), mean(data_n$Psych, na.rm = TRUE))
values_means_c <- c(means_cp, means_cn)
names_means_c <- c("+ Pregnant", "+ Goitre", "+ Tumor", "+ Psych", "- Pregnant", "- Goitre", "- Tumor", "- Psych")
means_c <- data.frame(names_means_c, values_means_c)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
Consideramos las 6 variables con más correlación con la variable objetivo State:
T4U(0.46), Pregnant(0.35), T3(0.34), TT4(0.26), Age(0.12), Sex(0.1)